***neighborhood-level social capital and depression after the outbreak of the COVID-19 pandemic: the mediating role of confidence in future***

clear 
set more off
use "D:\Data\cfps_20+2018_depression"
cd "D:\Output"

**preparing dataset**
tab year
drop if year==.
gen time=.
replace time=1 if year==2018
replace time=2 if year==2020
ta time year

xtset pid time
foreach var of varlist qn406 qn407 qn411 qn412 qn414 qn416 qn418 qn420 qn12016 {
	gen lag_`var'=l.`var'
}
tab1 lag_qn406-lag_qn420 qn12016
tab lag_qn406 time

drop time
drop if year==2018

*data analysis*
*demographic*
/*gender*/
tab gender
tab gender,nol

/*age*/
tab age
codebook age
recode age (-8/15=.) (16/29=1 "16-29") (30/39=2 "30-39") (40/49=3 "40-49") ///
       (50/59=4 "50-59") (60/69=5 "60-69") (70/104=6 "70 or above"), gen(age_group)
tab age_group	

drop if age<16 /***只保留成人***/

/*ethnicity*/
tab ethnicity
ta ethnicity, nol
recode ethnicity (1=1 "Han") (2/79=0 "non-Han") (-8=.), gen(eth_new)
ta eth_new 

/*marriage*/
ta marriage
codebook marriage
recode marriage (-8=.) (1=0 "未婚/离婚/丧偶") (4/5=0) (2/3=1 "已婚"), gen(mrrge_new)
tab mrrge_new

*socioeconomic*
/*education*/
tab edu /***来自cfps跨年数据库20版***/
tab edu,nol
recode edu (-9/-8=.) (1=1 "no formal education (Illiterate /semi-literate") ///
   (2=2 "primary school") (3=3 "junior high school") (4=4 "senior high school") ///
   (5/8=5 "college or above"), gen(edu_new)
tab edu_new

///****emoployment： 来自跨年核心变量数据库****///
tab employ
codebook employ

gen new_employ=.
replace new_employ=employ if employ>=0 & employ!=.

ta qc4
codebook qc4
replace new_employ=4 if qc4==1 & (employ<0 | employ==.)  /**全日制学生为学生**/
replace new_employ=1 if qc4==5 & (employ==. | employ<0)  /***在职学生为在业***/

recode new_employ (0=0 "unemployed/students/not in labor market") (3/4=0) (1=1 "employed"), gen(new_employ_20)
ta new_employ_20

/*family_size*/
sum familysize

/*household income*/
sum fincome1 
codebook fincome1

/**equivalized income of follow-ups by year in resampled sample**/
gen eq_hhd_inc = fincome1/sqrt(familysize)
sum eq_hhd_inc
hist eq_hhd_inc
gen ln_eq_hhd_inc = ln(eq_hhd_inc+1)
sum ln_eq_hhd_inc
hist ln_eq_hhd_inc

*lifestyle*
/*smoking last month*/
ta qq201
ta qq201, nol
ta qq201 year
recode qq201 (-9/-1=.) (0=0 "No") (1=1 "Yes"), gen(smoking)
ta smoking 

/*drinking more than 3 times per week last month*/
ta qq301
ta qq301,nol
ta qq301 year,nol
recode qq301 (-9/-1=.) (0=0 "No") (1=1 "Yes"), gen(drinking)
ta drinking, missing

/*exercise 2020*/
ta qp701n
ta qp701n, nol
recode qp701n (-9/-1=.) (8=1 "never") (1/2=2 "less than once per week") (3=3 "once or twice per week") (4=4 "3-4 times per week") (5/7=5 "5 times or more per week"), gen (exercise)
ta exercise
*rural-urban*
tab urban
tab urban, nol
recode urban (-9/-8=.)
tab urban 

*trust in neighbors*
ta qn10022
ta qn10022, nol
recode qn10022 (-8/-1=.), pre(new)
ta newqn10022

*vote 
ta qn7
ta qn7, nol
recode qn7 (-8/-1=.) (1=1 "yes") (5=0 "no"), gen(vote)
ta vote

*cesd-8*
tab1 qn406 qn407 qn411 qn412 qn414 qn416 qn418 qn420 lag_qn406-lag_qn420
recode qn406 qn407 qn411 qn412 qn414 qn416 qn418 qn420 lag_qn406-lag_qn420 (-9/-1=.) (1=0) (2=1) (3=2) (4=3),pre(new)
/***反向计分***/
recode newqn412 (0=3) (1=2) (2=1) (3=0)
recode newqn416 (0=3) (1=2) (2=1) (3=0)
recode newlag_qn412 (0=3) (1=2) (2=1) (3=0)
recode newlag_qn416 (0=3) (1=2) (2=1) (3=0)

gen cesd8_new=newqn406+newqn407+newqn411+newqn412+newqn414+newqn416+newqn418+newqn420
sum cesd8_new

gen lag_cesd8_new=newlag_qn406+newlag_qn407+newlag_qn411+newlag_qn412+newlag_qn414+newlag_qn416+newlag_qn418+newlag_qn420
sum lag_cesd8_new

/*dichotomized; 9 as the cutoff*/
/*https://doi.org/10.1007/s41999-017-0016-0*/
recode cesd8_new (9/24=1 "depressed") (0/8=0 "non-depressed"), gen(cesd8_dummy)
ta cesd8_dummy 

recode lag_cesd8_new (9/24=1 "depressed") (0/8=0 "non-depressed"), gen(lag_cesd8_dummy)
ta lag_cesd8_dummy

*physical discomfort during the past two weeks
ta qp301
recode qp301 (-8/-1=.) (0=0 "No") (1=1 "Yes"), gen(phy_hlth)
ta phy_hlth

*confidence about future
tab1 qn12016 lag_qn12016
tab1 qn12016 lag_qn12016, nol
recode qn12016 (-8/-1=.), gen(cfdc)
recode lag_qn12016 (-8/-1=.), gen(lag_cfdc)
tab1 cfdc lag_cfdc

*cid*
ta cid20
gen n=1
bys cid20: egen person_cid=total(n)
ta person_cid

gen cid_sample=0
replace cid_sample=1 if person_cid>24 & person_cid<3914
ta cid_sample
label values cid_sample cid_sample
label define cid_sample 1 ">=25 persons in cid" 0 "<25 persons in cid or cid missing"
ta cid_sample

ta subsample
ta cid_sample if subsample==1

logout, save(missing_data) word replace: ///
mdesc gender age_group eth_new mrrge_new edu_new new_employ_20 smoking drinking exercise ln_eq_hhd_inc newqn10022 region urban cesd8_new lag_cesd8_new vote cid20 cfdc lag_cfdc phy_hlth cid20 if cid_sample==1

for var gender age_group eth_new mrrge_new edu_new new_employ_20 smoking drinking exercise ln_eq_hhd_inc newqn10022 region urban cesd8_new lag_cesd8_new vote cid20 cfdc lag_cfdc phy_hlth: drop if X==.

*cid*
drop cid_sample n person_cid
ta cid20
gen n=1
bys cid20: egen person_cid=total(n)
ta person_cid

gen cid_sample=0
replace cid_sample=1 if person_cid>24 & person_cid<3914
ta cid_sample
label values cid_sample cid_sample
label define cid_sample 1 ">=25 persons in cid" 0 "<25 persons in cid or cid missing", replace
ta cid_sample

ta subsample
ta cid_sample if subsample==1

keep if cid_sample==1

bys cid20: egen vote_cid_persons=total(vote)
ta vote_cid_persons
bys cid20: gen vote_cid_proportion=(vote_cid_persons/person_cid)*100
ta vote_cid_proportion

bys cid20: egen trust_nghb_cid=mean(newqn10022)
ta trust_nghb_ci

**smaple characteristics with no imputation
logout, save(categorical) word replace: ///
  tab1 gender age_group eth_new mrrge_new edu_new new_employ_20 smoking drinking exercise phy_hlth lag_cesd8_dummy vote cmonth urban
  
logout, save(continuous) word replace: ///
  sum cesd8_new lag_cesd8_new ln_eq_hhd_inc cfdc lag_cfdc newqn10022
  
codebook cid20 /*209 cid*/
/**neighborhood-level description**/
bys cid20:gen m=_n
logout, save(neighborhood-level social capital) word replace: ///
  sum vote_cid_proportion trust_nghb_ci if m==1

global ctrl "gender i.age_group eth_new mrrge_new i.edu_new new_employ_20 smoking drinking  i.exercise phy_hlth lag_cesd8_new"

**two-level mediation
mixed cesd8_new vote newqn10022 vote_cid_proportion trust_nghb_cid ln_eq_hhd_inc  ///
   $ctrl lag_cfdc i.cmonth urban || cid20:  
est sto a  
mixed cfdc vote newqn10022 vote_cid_proportion trust_nghb_cid ln_eq_hhd_inc  ///
   $ctrl lag_cfdc i.cmonth urban || cid20: 
est sto b   
mixed cesd8_new cfdc vote newqn10022 vote_cid_proportion trust_nghb_cid ln_eq_hhd_inc  ///
   $ctrl lag_cfdc i.cmonth urban || cid20: 
est sto c
esttab a b c ///
      using cesd_score_unimputed.rtf, ///
      ci star( + 0.1 * 0.05 ** 0.01 *** 0.001) r2 b(%4.2f) pr2 aic bic onecell replace

global ctrl2 "gender i.age_group eth_new mrrge_new i.edu_new new_employ_20 smoking drinking i.exercise phy_hlth lag_cesd8_dummy"
melogit cesd8_dummy vote newqn10022 vote_cid_proportion trust_nghb_cid ln_eq_hhd_inc  ///
   $ctrl2 lag_cfdc i.cmonth urban if cid_sample==1 || cid20: 
  
melogit cesd8_dummy vote newqn10022 vote_cid_proportion trust_nghb_cid ln_eq_hhd_inc  ///
   $ctrl2 lag_cfdc i.cmonth urban if cid_sample==1 || cid20:, or 
est sto a 
mixed cfdc vote newqn10022 vote_cid_proportion trust_nghb_cid ln_eq_hhd_inc  ///
   $ctrl2 lag_cfdc i.cmonth urban if cid_sample==1 || cid20: 
est sto b
melogit cesd8_dummy cfdc vote newqn10022 vote_cid_proportion trust_nghb_cid ln_eq_hhd_inc  ///
   $ctrl2 lag_cfdc i.cmonth urban if cid_sample==1 || cid20:, or
est sto c
esttab a b c ///
      using cesd_dummy_coef_unimputed.rtf, ///
      ci star( + 0.1 * 0.05 ** 0.01 *** 0.001) r2 b(%4.2f) pr2 aic bic onecell replace
esttab a b c ///
      using cesd_dummy_or_unimputed.rtf, ///
      eform ci star( + 0.1 * 0.05 ** 0.01 *** 0.001) r2 b(%4.2f) pr2 aic bic onecell replace
  
log using "mediation and boostrap", replace
gsem (cfdc <- vote newqn10022 vote_cid_proportion trust_nghb_cid ln_eq_hhd_inc  ///
   $ctrl lag_cfdc i.cmonth urban  M1[cid20]) ///
    (cesd8_new <- cfdc vote newqn10022 vote_cid_proportion trust_nghb_cid ln_eq_hhd_inc  ///
   $ctrl lag_cfdc i.cmonth urban M2[cid20]), cov(M1[cid20]*M2[cid20]@0)
nlcom _b[cfdc:trust_nghb_cid]*_b[cesd8_new:cfdc] /*-.0586564; p<0.001; [-.0892574   -.0280555]*/   
nlcom _b[cfdc:trust_nghb_cid]*_b[cesd8_new:cfdc]+_b[cesd8_new:trust_nghb_cid]   /*-.4257985; p=0.001 [-.682513   -.1690839]*/
nlcom (_b[cfdc:trust_nghb_cid]*_b[cesd8_new:cfdc]) / (_b[cfdc:trust_nghb_cid]*_b[cesd8_new:cfdc]+_b[cesd8_new:trust_nghb_cid] ) /**13.77%**/

program bootmmcat, rclass
gsem (cfdc <- vote newqn10022 vote_cid_proportion trust_nghb_cid ln_eq_hhd_inc  ///
   $ctrl lag_cfdc i.cmonth urban  M1[cid20]) ///
    (cesd8_new <- cfdc vote newqn10022 vote_cid_proportion trust_nghb_cid ln_eq_hhd_inc  ///
   $ctrl lag_cfdc i.cmonth urban M2[cid20]) if cid_sample==1, cov(M1[cid20]*M2[cid20]@0)
  return scalar ind_eff = _b[cfdc:trust_nghb_cid]*_b[cesd8_new:cfdc] 
  return scalar dir_eff = _b[cesd8_new:trust_nghb_cid]
  return scalar tot_eff = _b[cfdc:trust_nghb_cid]*_b[cesd8_new:cfdc]+_b[cesd8_new:trust_nghb_cid]
end
bootstrap r(ind_eff) r(dir_eff) r(tot_eff), seed(1234) reps(1000): bootmmcat  
estat boot, all

gsem (cfdc <- vote newqn10022 vote_cid_proportion trust_nghb_cid ln_eq_hhd_inc  ///
   $ctrl2 lag_cfdc i.cmonth urban  M1[cid20]) ///
    (cesd8_dummy <- cfdc vote newqn10022 vote_cid_proportion trust_nghb_cid ln_eq_hhd_inc  ///
   $ctrl2 lag_cfdc i.cmonth urban M2[cid20], logit) if cid_sample==1, cov(M1[cid20]*M2[cid20]@0)
nlcom _b[cfdc:trust_nghb_cid]*_b[cesd8_dummy:cfdc]  /*-.0387182; p<0.001; [-.0580805    -.019356]*/ 
nlcom _b[cfdc:trust_nghb_cid]*_b[cesd8_dummy:cfdc] + _b[cesd8_dummy:trust_nghb_cid] /*-.265546; 0.002; [-.4308176  -.1002744]*/
nlcom  (_b[cfdc:trust_nghb_cid]*_b[cesd8_dummy:cfdc]) / (_b[cfdc:trust_nghb_cid]*_b[cesd8_dummy:cfdc] + _b[cesd8_dummy:trust_nghb_cid])
/**14.58%**/

program bootmmcat2, rclass
gsem (cfdc <- vote newqn10022 vote_cid_proportion trust_nghb_cid ln_eq_hhd_inc  ///
   $ctrl2 lag_cfdc i.cmonth urban  M1[cid20]) ///
    (cesd8_dummy <- cfdc vote newqn10022 vote_cid_proportion trust_nghb_cid ln_eq_hhd_inc  ///
   $ctrl2 lag_cfdc i.cmonth urban M2[cid20], logit) if cid_sample==1, cov(M1[cid20]*M2[cid20]@0)
   return scalar ind_eff = _b[cfdc:trust_nghb_cid]*_b[cesd8_dummy:cfdc] 
   return scalar dir_eff = _b[cesd8_dummy:trust_nghb_cid]
   return scalar tot_eff = _b[cfdc:trust_nghb_cid]*_b[cesd8_dummy:cfdc] + _b[cesd8_dummy:trust_nghb_cid]
end
bootstrap r(ind_eff) r(dir_eff) r(tot_eff), seed(1234) reps(1000): bootmmcat2 
estat boot, all
log close
